library(dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library(Seurat)
## Attaching SeuratObject
## Attaching sp
library(patchwork)
library(Seurat)
library(devtools)
## Loading required package: usethis

Loading the data

The data were taken from PANGLAODB. It a sample of substanzia nigra form an adult donor.

note: sn stands for Substantia Nigra

load("/Users/carlomanenti/fasulla/SRA850958_SRS4386112.sparse.RData")
rownames(sm) <- sapply(strsplit(rownames(sm),"_"), `[`, 1)
sn.data <- sm 

sn <- CreateSeuratObject(counts = sn.data, project = "snsc", min.cells = 3, min.features = 200)
## Warning: Non-unique features (rownames) present in the input matrix, making
## unique

Cell quality control

Checking presence of mithocondrial genes (MT-…)

grep("^MT-",rownames(sn),value = TRUE)
##  [1] "MT-ATP6" "MT-ATP8" "MT-CO1"  "MT-CO2"  "MT-CO3"  "MT-CYB"  "MT-ND1" 
##  [8] "MT-ND2"  "MT-ND3"  "MT-ND4"  "MT-ND4L" "MT-ND5"  "MT-ND6"  "MT-RNR1"
## [15] "MT-RNR2" "MT-TC"   "MT-TL1"  "MT-TP"
sn[["percent.mt"]] <- PercentageFeatureSet(sn, pattern = "^MT-")

Ribosomal protein genes (RPL|S…)

grep("^RP[LS]",rownames(sn),value = TRUE)
 
sn[["percent.rbp"]] <- PercentageFeatureSet(sn, pattern = "^RP[LS]")

Visualize QC metrics as violin plots

(the number of unique genes, called features and of total molecules are automatically created during the CreateSeuratObject)

VlnPlot(sn, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rbp"), ncol = 4)

Same plot but without dots

VlnPlot(sn, features = c("nFeature_RNA", "nCount_RNA", "percent.mt","percent.rbp"), ncol = 4, pt.size=0)

We can check if the different parameters are correlated with one another by using the FeatureScatter plots:

• Correlation between % of mitochondrial RNA and number of reads • Correlation between number of genes and number of reads • Correlation between % of rRNA and number of reads

FeatureScatter(sn, feature1 = "nCount_RNA", feature2 = "percent.mt")

FeatureScatter(sn, feature1 = "nCount_RNA", feature2 = "nFeature_RNA")

FeatureScatter(sn, feature1 = "nCount_RNA", feature2 = "percent.rbp")

Defining a thresholds for cell quality control

In this case the number of genes must be between 200 and 3000 and the mithocondrial DNA/RNA percentage must be lower than 5%

sn 
## An object of class Seurat 
## 27364 features across 7055 samples within 1 assay 
## Active assay: RNA (27364 features, 0 variable features)
sn <- subset(sn, subset = nFeature_RNA > 200 & nFeature_RNA < 3000 & percent.mt < 5)
sn
## An object of class Seurat 
## 27364 features across 6434 samples within 1 assay 
## Active assay: RNA (27364 features, 0 variable features)

Data Normalization

sn <- NormalizeData(sn, normalization.method = "LogNormalize", scale.factor = 10000)

Cell cycle effect

Seurat contains a pre-computed list of cell cycle specific genes that can be used to guess which CC phase the cell is in Since our sample are brain cells this probably won’t be too useful.

sn <- CellCycleScoring(sn, s.features = cc.genes.updated.2019$s.genes, g2m.features = cc.genes.updated.2019$g2m.genes, set.ident = TRUE)
## Warning: The following features are not present in the object: UNG, DTL, UHRF1,
## UBR7, EXO1, USP1, E2F8, not searching for symbol synonyms
## Warning: The following features are not present in the object: UBE2C, NDC80,
## MKI67, CKAP2L, AURKB, HJURP, CDC25C, DLGAP5, CENPA, not searching for symbol
## synonyms
sn[[]]

Keeping only the 2000 most variable genes

We keep a subset of genes with the greathest variability of expression across cells

sn <- FindVariableFeatures(sn, selection.method = "vst", nfeatures = 2000)

top10 <- head(VariableFeatures(sn), 10)

plot1 <- VariableFeaturePlot(sn)
plot2 <- LabelPoints(plot = plot1, points = top10, repel = TRUE)
## When using repel, set xnudge and ynudge to 0 for optimal results
plot2
## Warning: Transformation introduced infinite values in continuous x-axis
## Warning: Removed 18 rows containing missing values (geom_point).

Scaling the data

We shift the expression of each gene so that the mean is 0 and the variance is 1 across cells

all.genes <- rownames(sn)
sn <- ScaleData(sn, features = all.genes)
## Centering and scaling data matrix

Dimensional reduction

We perform PCA on the 2000 most variable genes

sn <- RunPCA(sn, features = VariableFeatures(object = sn))
## PC_ 1 
## Positive:  SPARCL1, ATP1A2, NDUFA4L2, RGS5, IFITM3, EPAS1, TSC22D1, GFAP, TIMP3, MT2A 
##     IFITM1, ABCG2, ITIH5, VIM, CLDN5, FLT1, IGFBP7, GNG11, IFI27, ITM2C 
##     HLA-E, HIGD1B, COBLL1, DCN, ATP1B2, ARHGAP29, CAVIN2, RBMS3, EGFL7, CA4 
## Negative:  PLP1, MBP, CNP, CRYAB, MAG, APLP1, ERMN, MOBP, CLDN11, TMEM144 
##     TF, SPP1, GPM6B, FEZ1, TUBA1A, SOX2-OT, CLDND1, PIP4K2A, SLC44A1, ENPP2 
##     SCD, QDPR, ATP1B1, ST18, RAPGEF5, KLK6, ANLN, S100B, STMN1, CNDP1 
## PC_ 2 
## Positive:  SLC14A1, FGFR3, IQCA1, AQP4, AQP1, PTPRZ1, RYR3, SLC1A2, SLC6A11, GJA1 
##     SYNE1, NTRK2, RGMA, CAMK2G, MIAT, SDC4, TNC, BCAN, FOS, TRPM3 
##     APOE, ETNPPL, ID2, EFEMP1, AEBP1, AGT, GPM6A, PFKP, ID4, FAT3 
## Negative:  NDUFA4L2, RGS5, IFITM1, EPAS1, DCN, ITIH5, VIM, ABCG2, FLT1, COBLL1 
##     GNG11, MYL9, CAVIN2, CLDN5, ADGRF5, IGFBP7, CA4, ABCB1, HIGD1B, NOTCH3 
##     ARHGAP29, RERGL, SLC6A12, ESAM, EGFL7, SLC38A5, IFITM3, IFI27, B2M, HERC2P3 
## PC_ 3 
## Positive:  MEG3, SYNPR, SYT1, HGNC:24955, SCG2, GAD2, SLC12A5, GRIK1, GAD1, CELF4 
##     ZNF385D, LRRC4C, GPM6A, KCNIP4-IT1, THY1, VSTM2L, ATP1A3, FGF14-IT1, KCNJ6, SNAP25 
##     SCN3B, SYN2, SLC8A1, PTPRN, SPHKAP, SLC4A10, GRIP2, SV2A, RAB3C, TSPAN7 
## Negative:  CD74, LPAR6, CSF3R, RNA5SP151, ADAM28, ITGAX, CSF1R, KCNMB1, C3, HLA-DRA 
##     A2M, MED12L, RNASET2, CX3CR1, GFAP, P2RY13, CLEC7A, LAPTM5, HLA-E, C1QB 
##     SLCO2B1, INPP5D, ADAP2, C1QC, LST1, PTPRC, SERPINB9, SLC14A1, SAMSN1, HLA-DRB1 
## PC_ 4 
## Positive:  CD74, CSF3R, RNA5SP151, LPAR6, ADAM28, KCNMB1, CSF1R, ITGAX, HLA-DRA, MED12L 
##     CX3CR1, SAT1, ARHGAP24, C1QB, RNASET2, P2RY13, CLEC7A, LAPTM5, SRGAP2, C1QC 
##     MAF, SLCO2B1, C3, PTPRC, ADAP2, MEF2C, TMEM52B, LST1, TMC8, TNFRSF13C 
## Negative:  MT2A, SLC14A1, SYNE1, PTGDS, AQP4, GJA1, IQCA1, FGFR3, AQP1, ATP1A2 
##     MT1E, GFAP, MT1X, SDC4, AGT, RYR3, NTRK2, RGMA, ATP1B2, SLC6A11 
##     SORBS1, ETNPPL, MT2P1, TNC, MT1G, AEBP1, SLC1A2, CAMK2G, SPARCL1, EDNRB 
## PC_ 5 
## Positive:  SYNPR, SYT1, GAD2, SLC12A5, VSTM2L, CELF4, GRIP2, SCN3B, CAMK2B, OTX2 
##     GABBR2, SYN2, PTPRN, GRIN1, RAB3B, PGM2L1, SNCB, TAC1, ZNF385D, KIT 
##     AC016717.2, SLC14A1, GUCY1A3, SYT7, IGSF3, NRGN, IQCA1, SNAP25, RAB3A, ATP2B2 
## Negative:  VCAN, B3GNT7, TNR, PDGFRA, PTPRZ1, COL9A1, CRISPLD2, LRRC4C, DSCAM, BCAN 
##     MMP16, MEG3, PLEKHH2, NEU4, PCDH15, PHLDA1, GPR17, ASCL1, FERMT1, CSPG5 
##     SLC35F1, SEMA5A, LUZP2, SOX6, SNTG1, CSPG4, OLIG2, SULF2, CD82, MARCKS
print(sn[["pca"]], dims = 1:5, nfeatures = 5)
## PC_ 1 
## Positive:  SPARCL1, ATP1A2, NDUFA4L2, RGS5, IFITM3 
## Negative:  PLP1, MBP, CNP, CRYAB, MAG 
## PC_ 2 
## Positive:  SLC14A1, FGFR3, IQCA1, AQP4, AQP1 
## Negative:  NDUFA4L2, RGS5, IFITM1, EPAS1, DCN 
## PC_ 3 
## Positive:  MEG3, SYNPR, SYT1, HGNC:24955, SCG2 
## Negative:  CD74, LPAR6, CSF3R, RNA5SP151, ADAM28 
## PC_ 4 
## Positive:  CD74, CSF3R, RNA5SP151, LPAR6, ADAM28 
## Negative:  MT2A, SLC14A1, SYNE1, PTGDS, AQP4 
## PC_ 5 
## Positive:  SYNPR, SYT1, GAD2, SLC12A5, VSTM2L 
## Negative:  VCAN, B3GNT7, TNR, PDGFRA, PTPRZ1
VizDimLoadings(sn, dims = 1:2, reduction = "pca")

VizDimLoadings(sn, dims = 3:4, reduction = "pca")

VizDimLoadings(sn, dims = 4:5, reduction = "pca")

Visualizing cells using the first two Principal Components (PC)

We project the cells in the first two principal components, the cells are coluored according to their cell cycle phase

DimPlot(sn, reduction = "pca")

The cell cycle seems to be ininfluent!

Selecting the number of PCs

With ndims we can choose how many PC to plot

ElbowPlot(sn, ndims= 30)

Choosing how many dimensions to use can vary depending on the method we choose. In general it’s better to keep all PC until 70/75% of the variance is explained

pc.touse <- (sn$pca@stdev)^2
pc.touse <- pc.touse/sum(pc.touse)
pc.touse <- cumsum(pc.touse)[1:50]
pc.touse <- min(which(pc.touse>=0.85))
pc.touse
## [1] 27

In our case we opted for 11 PCs (standard deviation lower than 2) components and 20 PCs (At least 80% of the variance is explained).

Clustering with 11 PCs

The first step uses the FindNeighbors function, which constructs a KNN graph based on the euclidean distance in PCA space and refines the edge weights using the Jaccard similarity

sn.11.5 <- FindNeighbors(sn, dims = 1:11)
## Computing nearest neighbor graph
## Computing SNN

To cluster the cell we use the FindClusters function, which uses the Louvain algorithm to iteratively group cells together

sn.11.5 <- FindClusters(sn.11.5, resolution = 0.5)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6434
## Number of edges: 227954
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.9219
## Number of communities: 11
## Elapsed time: 0 seconds
sn.11.5 <- sn.11.5

T Stochastic Neighbor Embedding (TSNE)

We plot the clusters using TSNE

sn.11.5.tsne <- RunTSNE(sn.11.5, dims=1:11)
DimPlot(sn.11.5.tsne, reduction = "tsne")

## Uniform Manifold Approximation and Projection(UMAP), which is generally preferred but requires the installation of a new package

sn.11.5.UMAP <- RunUMAP(sn.11.5, dims = 1:11)
## Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
## To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
## This message will be shown once per session
## 12:33:50 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:33:50 Read 6434 rows and found 11 numeric columns
## 12:33:50 Using Annoy for neighbor search, n_neighbors = 30
## 12:33:50 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:33:50 Writing NN index file to temp file /var/folders/nx/9kjdn6x158927qbzy599pcg00000gn/T//RtmpGUpfMS/file1c09126c25a8
## 12:33:50 Searching Annoy index using 1 thread, search_k = 3000
## 12:33:52 Annoy recall = 100%
## 12:33:52 Commencing smooth kNN distance calibration using 1 thread
## 12:33:52 Initializing from normalized Laplacian + noise
## 12:33:53 Commencing optimization for 500 epochs, with 263510 positive edges
## 12:34:05 Optimization finished
DimPlot(sn.11.5.UMAP, reduction = "umap")

We can also check whether some of the critial quality paramteres influenced the clustering we got

VlnPlot(sn.11.5.UMAP,features="nCount_RNA")

VlnPlot(sn.11.5.UMAP,features="nFeature_RNA")

VlnPlot(sn.11.5.UMAP,features="percent.mt")

VlnPlot(sn.11.5.UMAP,features="percent.rbp")

or the cell cycle

library(ggplot2)
library(dbplyr)
## 
## Attaching package: 'dbplyr'
## The following objects are masked from 'package:dplyr':
## 
##     ident, sql
sn.11.5@meta.data %>%
  group_by(seurat_clusters,Phase) %>%
  count()  %>% 
  group_by(seurat_clusters) %>%
  mutate(percent=100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=Phase)) +
  geom_col() + 
  ggtitle("Percentage of cell cycle phases per cluster")

All in all there is quite some mt DNA in cluster 6 and 7 that may lead to probelms in indetifying marker genes. For all the other quality check the clustering seems reasonable.

Clustering with 27 PCs

Repeating the analysis for 27 PCs

sn.27.5 <- FindNeighbors(sn, dims = 1:27)
## Computing nearest neighbor graph
## Computing SNN

To cluster the cell we use the FindClusters function, which uses the Louvain algorithm to iteratively group cells together

sn.27.5 <- FindClusters(sn.27.5, resolution = 1)
## Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
## 
## Number of nodes: 6434
## Number of edges: 299775
## 
## Running Louvain algorithm...
## Maximum modularity in 10 random starts: 0.8556
## Number of communities: 13
## Elapsed time: 0 seconds
sn.27.5 <- sn.27.5

T Stochastic Neighbor Embedding (TSNE)

We plot the clusters using TSNE

sn.27.5.tsne <- RunTSNE(sn.27.5, dims=1:27)
DimPlot(sn.27.5.tsne, reduction = "tsne")

## Uniform Manifold Approximation and Projection(UMAP), which is generally preferred but requires the installation of a new package

sn.27.5.UMAP <- RunUMAP(sn.27.5, dims = 1:27)
## 12:34:26 UMAP embedding parameters a = 0.9922 b = 1.112
## 12:34:26 Read 6434 rows and found 27 numeric columns
## 12:34:26 Using Annoy for neighbor search, n_neighbors = 30
## 12:34:26 Building Annoy index with metric = cosine, n_trees = 50
## 0%   10   20   30   40   50   60   70   80   90   100%
## [----|----|----|----|----|----|----|----|----|----|
## **************************************************|
## 12:34:27 Writing NN index file to temp file /var/folders/nx/9kjdn6x158927qbzy599pcg00000gn/T//RtmpGUpfMS/file1c093548d44c
## 12:34:27 Searching Annoy index using 1 thread, search_k = 3000
## 12:34:28 Annoy recall = 100%
## 12:34:28 Commencing smooth kNN distance calibration using 1 thread
## 12:34:29 Initializing from normalized Laplacian + noise
## 12:34:29 Commencing optimization for 500 epochs, with 287734 positive edges
## 12:34:42 Optimization finished
DimPlot(sn.27.5.UMAP, reduction = "umap")

We can also check whether some of the critial quality paramteres influenced the clustering we got

VlnPlot(sn.27.5.UMAP,features="nCount_RNA")

VlnPlot(sn.27.5.UMAP,features="nFeature_RNA")

VlnPlot(sn.27.5.UMAP,features="percent.mt")

VlnPlot(sn.27.5.UMAP,features="percent.rbp")

## or the cell cycle

library(ggplot2)
library(dbplyr)

sn.27.5@meta.data %>%
  group_by(seurat_clusters,Phase) %>%
  count()  %>% 
  group_by(seurat_clusters) %>%
  mutate(percent=100*n/sum(n)) %>%
  ungroup() %>%
  ggplot(aes(x=seurat_clusters,y=percent, fill=Phase)) +
  geom_col() + 
  ggtitle("Percentage of cell cycle phases per cluster")

Having a higher resolution leads to a higher number of clusters. And in this case the definition of clusters is impacted also by using many PCs to explain the vast majority of variance. This way we probably ended up defining the cluster also by noise and in a pure techinical way than a meaningful way.

Overlapping

## **Overlapping**
library(ggplot2)
pbmc16_02 <- sn.11.5.UMAP 
pbmc20_001 <- sn.27.5.UMAP

Clusters_1 <- 10
CLusters_2 <- 12
overlap <- data.frame(cluster_res1 = NA, cluster_res2 = NA, number_cells_common =NA) 

# with PC=16 and resolution=0.2 I obtain clusters from 0 to 11, with PC=20 and resolution=0.01 I obtain clusters from 0 to 6 

for (i in 0:Clusters_1) { 
for (j in 0:CLusters_2) { 
overlap <- rbind(overlap, c(i,j,length(intersect(rownames(as.data.frame(Idents(pbmc16_02)[Idents(pbmc16_02) == i])),rownames(as.data.frame(Idents(pbmc20_001)[Idents(pbmc20_001) == j])))))) 
} 
} 

overlap <- overlap[c(-1),] 

# dotplot 
ggplot(overlap, aes(x= cluster_res1, y= cluster_res2, colour=number_cells_common, size=number_cells_common)) + geom_point() + xlab('Clustering 1 (11 PCs and 0.5 resolution)') + ylab('Clustering 2 (27 PCs and 1 resolution') + ggtitle('Overlapping cells') +
theme_classic() + scale_x_continuous(breaks = 0:12) + scale_y_continuous(breaks = 0:12) 

The evalutation of overlapping cells in each cluster further asses the problem of using to many PCs and a too high resolution, since many clusters are probably split only due to noise.

Number of cell for each cluster

# 
cat('11PCs and 0.5 resolution')
## 11PCs and 0.5 resolution
for (cluster_n in seq(0, Clusters_1 )){
  n_cells<- length(Idents(sn.11.5.UMAP)[Idents(pbmc20_001) == cluster_n])
  cat('Cluster', cluster_n, 'is composed of', n_cells, 'cells \n')
  
}
## Cluster 0 is composed of 932 cells 
## Cluster 1 is composed of 784 cells 
## Cluster 2 is composed of 747 cells 
## Cluster 3 is composed of 708 cells 
## Cluster 4 is composed of 649 cells 
## Cluster 5 is composed of 644 cells 
## Cluster 6 is composed of 622 cells 
## Cluster 7 is composed of 485 cells 
## Cluster 8 is composed of 291 cells 
## Cluster 9 is composed of 208 cells 
## Cluster 10 is composed of 200 cells
cat(' \n \n \n')
cat('27PCs and 1 resolution')
## 27PCs and 1 resolution
for (cluster_n in seq(0, CLusters_2 )){
  n_cells<- length(Idents(sn.27.5.UMAP)[Idents(pbmc20_001) == cluster_n])
  cat('Cluster', cluster_n, 'is composed of', n_cells, 'cells \n')
}
## Cluster 0 is composed of 932 cells 
## Cluster 1 is composed of 784 cells 
## Cluster 2 is composed of 747 cells 
## Cluster 3 is composed of 708 cells 
## Cluster 4 is composed of 649 cells 
## Cluster 5 is composed of 644 cells 
## Cluster 6 is composed of 622 cells 
## Cluster 7 is composed of 485 cells 
## Cluster 8 is composed of 291 cells 
## Cluster 9 is composed of 208 cells 
## Cluster 10 is composed of 200 cells 
## Cluster 11 is composed of 124 cells 
## Cluster 12 is composed of 40 cells

Also the number of cell for each cluster is much more variable using 27PCs and 1 as resolution. This may also contribute to meaningless clusters.

Finding marker genes for 11PCs clustering

Seurat also includes a function that can be used to find genes over expressed between two clusters or overexpressed in one cluster with respect to all the others

The one vs all clusters analyses can be iterated automatically

sn.11.5 <- sn.11.5.UMAP
sn.markers <- FindAllMarkers(sn.11.5, only.pos = TRUE, min.pct = 0.25, logfc.threshold = 0.25)
## Calculating cluster 0
## Calculating cluster 1
## Calculating cluster 2
## Calculating cluster 3
## Calculating cluster 4
## Calculating cluster 5
## Calculating cluster 6
## Calculating cluster 7
## Calculating cluster 8
## Calculating cluster 9
## Calculating cluster 10
sn.markers %>%
    group_by(cluster) %>%
    slice_max(n = 5, order_by = avg_log2FC)
## # A tibble: 55 × 7
## # Groups:   cluster [11]
##        p_val avg_log2FC pct.1 pct.2 p_val_adj cluster gene   
##        <dbl>      <dbl> <dbl> <dbl>     <dbl> <fct>   <chr>  
##  1 2.76e-199       2.41 0.631 0.325 7.56e-195 0       STMN4  
##  2 0               2.40 0.937 0.615 0         0       S100B  
##  3 0               2.28 0.974 0.564 0         0       CRYAB  
##  4 0               2.19 0.889 0.515 0         0       TUBA1A 
##  5 6.77e-250       2.16 0.74  0.413 1.85e-245 0       STMN1  
##  6 0               2.96 0.707 0.146 0         1       SYNE1  
##  7 0               2.90 0.547 0.062 0         1       FGFR3  
##  8 0               2.83 0.564 0.064 0         1       SLC14A1
##  9 0               2.77 0.488 0.053 0         1       IQCA1  
## 10 0               2.74 0.445 0.036 0         1       SLC6A11
## # … with 45 more rows

Heatmap

sn.markers %>%
    group_by(cluster) %>%
    top_n(n = 10, wt = avg_log2FC) -> top10
DoHeatmap(sn.11.5, features = top10$gene) + NoLegend()

Looking at the Heatmap we can see that clusters 2 and 3 show quite a lot of similarities. Also clusters 1 and 8 are similar and clusters 5 and 10.

Going in depth

Oligodendrocytes

We want to asses the cell type shared by cluster 0, 2 and 3.

cluster0AND2AND3.markers <- FindMarkers(sn.11.5, ident.1 = c(0,2,3), min.pct = 0.25, test.use = "wilcox")
cluster0AND2AND3.markers <- cluster0AND2AND3.markers[order(-cluster0AND2AND3.markers$avg_log2FC), ] 
head(cluster0AND2AND3.markers, n = 20)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj
## CRYAB       0   3.238763 0.985 0.397         0
## MAG         0   3.142866 0.847 0.152         0
## CNP         0   3.005078 0.912 0.275         0
## ERMN        0   2.960278 0.814 0.128         0
## PLP1        0   2.952570 0.996 0.594         0
## CLDN11      0   2.852663 0.674 0.134         0
## TUBA1A      0   2.708940 0.921 0.347         0
## FEZ1        0   2.674523 0.783 0.181         0
## STMN4       0   2.622196 0.725 0.140         0
## APLP1       0   2.597775 0.784 0.155         0
## S100B       0   2.575254 0.934 0.492         0
## QDPR        0   2.543784 0.711 0.187         0
## GJB1        0   2.451394 0.557 0.062         0
## TF          0   2.439317 0.879 0.316         0
## STMN1       0   2.411306 0.802 0.243         0
## RAPGEF5     0   2.406229 0.661 0.107         0
## PMP22       0   2.397809 0.638 0.118         0
## KLK6        0   2.286000 0.470 0.044         0
## CLDND1      0   2.261389 0.725 0.153         0
## SPP1        0   2.253051 0.913 0.344         0

MAG, CNP, ERMN -> Oligodendrocytes

It seems that they are all oligodendrocytes

Let’s see the genes making the difference between those two clusters: Genes overexpressed in cluster 0 vs cluster 2 and 3

cluster0vs23.markers <- FindMarkers(sn.11.5, ident.1 = 0, ident.2 = c(2, 3), min.pct = 0.25, test.use = "wilcox") 
cluster0vs23.markers <- cluster0vs23.markers[order(-cluster0vs23.markers$avg_log2FC),] 
head(cluster0vs23.markers, n = 10)
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## TUBB2B    9.811298e-18   1.568625 0.421 0.389  2.684764e-13
## LGALS1    8.818528e-16   1.523813 0.444 0.451  2.413102e-11
## MYLK      2.714519e-04   1.414340 0.382 0.481  1.000000e+00
## S100B    1.435508e-166   1.354370 0.937 0.932 3.928124e-162
## STMN4     6.619515e-38   1.341009 0.631 0.803  1.811364e-33
## MGST3     8.322499e-01   1.203809 0.357 0.536  1.000000e+00
## SCRG1     1.411455e-01   1.186089 0.261 0.386  1.000000e+00
## SERPINI1  5.000873e-02   1.143643 0.294 0.463  1.000000e+00
## STMN1     1.390251e-66   1.126726 0.740 0.853  3.804284e-62
## GPD1      5.717462e-04   1.097975 0.217 0.353  1.000000e+00

The main DE genes are linked to the cytoskeleton of the cell and to Ca2+ mediated response. S100B -> Small subset of OPC modulation of OPC synapsis. May act as precursors of AST (astrocyets) ’’’ astrocytic marker S100β, pointing to a role of S100β in the modulation of OPCs synapses or identifying a subset of cells that may act as precursors for astrocytes ’’’ cit

And overexpressed in cluster 2 vs cluster 0 and 3

cluster2vs03.markers <- FindMarkers(sn.11.5, ident.1 = 2, ident.2 = c(0, 3), min.pct = 0.25, test.use = "wilcox") 
cluster2vs03.markers <- cluster2vs03.markers[order(-cluster2vs03.markers$avg_log2FC),] 
head(cluster2vs03.markers, n = 10)
##                    p_val avg_log2FC pct.1 pct.2     p_val_adj
## GAPDH      1.918641e-187  1.4246237 0.840 0.178 5.250169e-183
## PRDX1      8.965040e-176  1.1911357 0.842 0.179 2.453194e-171
## KLK6       9.883987e-158  1.1897185 0.927 0.285 2.704654e-153
## DNAH17-AS1 1.422112e-103  1.1518760 0.421 0.062  3.891467e-99
## FTLP3      4.365625e-147  1.1409274 0.718 0.149 1.194610e-142
## GAPDHP1    6.460215e-152  1.0644227 0.794 0.182 1.767773e-147
## C2orf82    1.232992e-113  1.0113819 0.334 0.022 3.373960e-109
## MARCKSL1   2.964506e-119  1.0058306 0.992 0.517 8.112075e-115
## PTGDS      1.831073e-117  0.9735822 0.997 0.606 5.010548e-113
## AC008417.1  6.097955e-99  0.9121903 0.448 0.074  1.668644e-94

GAPDH -> glycolysis PRDX1 -> protection against oxidative stress KLK6 -> Degrades alpha-synuclein and prevents its polymerization

Satellaite Oligodendrocytes

And overexpressed in cluster 3 vs cluster 0 and 2

cluster3vs02.markers <- FindMarkers(sn.11.5, ident.1 = 3, ident.2 = c(0, 2), min.pct = 0.25, test.use = "wilcox") 
cluster3vs02.markers <- cluster3vs02.markers[order(-cluster3vs02.markers$avg_log2FC),] 
head(cluster3vs02.markers, n = 10)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## OPALIN    1.171272e-291   2.829309 0.754 0.068 3.205069e-287
## ITM2A     1.048422e-119   1.855625 0.384 0.041 2.868902e-115
## SUN2       2.766859e-51   1.403976 0.337 0.113  7.571234e-47
## HHIP       1.906966e-84   1.360213 0.545 0.195  5.218221e-80
## CD82       4.269736e-65   1.324387 0.283 0.052  1.168371e-60
## QDPR      1.951641e-138   1.312583 0.969 0.621 5.340469e-134
## ANK3       2.268497e-87   1.281525 0.638 0.274  6.207516e-83
## LINC00844  5.641154e-69   1.275117 0.552 0.225  1.543645e-64
## MID1IP1    7.822136e-64   1.249336 0.578 0.279  2.140449e-59
## KIAA0930   4.752932e-65   1.226123 0.404 0.124  1.300592e-60

OPALIN -> Promotes oligodendrocyte terminal differentiation ITM2A -> Angiogenesis HHIP -> Myelin sheet organization ANK3 -> Myelin sheath organization MMID1IP1 -> lipid biosynthesis

Perivascular Oligodendrocytes

The strange case of cluster 6

cluster0AND2AND3AND6.markers <- FindMarkers(sn.11.5, ident.1 = c(0,2,3,6), min.pct = 0.25, test.use = "wilcox")
cluster0AND2AND3AND6.markers <- cluster0AND2AND3AND6.markers[order(-cluster0AND2AND3AND6.markers$avg_log2FC), ] 
head(cluster0AND2AND3AND6.markers, n = 10)
##        p_val avg_log2FC pct.1 pct.2 p_val_adj
## MBP        0   3.990324 0.962 0.275         0
## PLP1       0   3.713211 0.981 0.529         0
## CLDN11     0   3.629129 0.628 0.074         0
## MOBP       0   3.487979 0.775 0.099         0
## CNP        0   3.403986 0.841 0.222         0
## MAG        0   3.344376 0.746 0.119         0
## CRYAB      0   3.321436 0.907 0.362         0
## ERMN       0   3.300869 0.723 0.086         0
## APLP1      0   3.260704 0.719 0.097         0
## ST18       0   3.020800 0.575 0.040         0

They are all olygodendrocytes, so we need to further asses the subtype of cluster 6.

cluster6vs023.markers <- FindMarkers(sn.11.5, ident.1 = 6, ident.2 = c(0, 2, 3), min.pct = 0.25, test.use = "wilcox") 
cluster6vs023.markers <- cluster6vs023.markers[order(-cluster6vs023.markers$avg_log2FC),] 
head(cluster6vs023.markers, n = 10)
##                  p_val avg_log2FC pct.1 pct.2     p_val_adj
## NEAT1    2.573288e-108   1.512712 0.870 0.698 7.041545e-104
## MTATP6P1  1.614773e-19   1.481639 0.407 0.310  4.418664e-15
## MT-ND4L   1.548508e-13   1.466584 0.326 0.243  4.237336e-09
## MIR219A2  7.189556e-37   1.454766 0.580 0.458  1.967350e-32
## MT-RNR2   1.453683e-92   1.453312 0.852 0.711  3.977858e-88
## CERCAM    6.247712e-23   1.428932 0.453 0.341  1.709624e-18
## WSB1      1.721366e-42   1.346586 0.626 0.497  4.710345e-38
## MAP4K4    3.728394e-18   1.340851 0.388 0.285  1.020238e-13
## AATK      1.073924e-08   1.305483 0.272 0.207  2.938686e-04
## ERBIN     1.417623e-05   1.255003 0.264 0.227  3.879184e-01

cluster 6 vs ALL CERCAM: Myelin sheath organization TTLL7: Myelin sheath organization TMEM165: Calcium/proton transporter involved in calcium and in lysosomal pH homeostasis. ATP8A1: Myelin sheath organization

Intrafascicular Oligodendrocytes

Astrocytes

repeating the analysis also for clusters 1 and 8

cluster1AND8.markers <- FindMarkers(sn.11.5, ident.1 = c(1, 8), min.pct = 0.25, test.use = "wilcox")
cluster1AND8.markers <- cluster1AND8.markers[order(-cluster1AND8.markers$avg_log2FC), ] 
head(cluster1AND8.markers, n = 20)
##         p_val avg_log2FC pct.1 pct.2 p_val_adj
## SLC14A1     0   3.585351 0.602 0.026         0
## AQP1        0   3.354460 0.512 0.030         0
## SYNE1       0   3.304413 0.710 0.113         0
## FGFR3       0   3.222259 0.547 0.034         0
## IQCA1       0   3.188155 0.506 0.023         0
## AQP4        0   3.155195 0.532 0.034         0
## MT2A        0   3.025081 0.845 0.304         0
## GJA1        0   2.975764 0.522 0.041         0
## ACOT11      0   2.910918 0.499 0.041         0
## GFAP        0   2.903876 0.759 0.188         0
## SDC4        0   2.880833 0.387 0.019         0
## RGMA        0   2.800576 0.406 0.022         0
## RYR3        0   2.790579 0.424 0.027         0
## APOE        0   2.717603 0.456 0.051         0
## SLC6A11     0   2.716400 0.402 0.023         0
## MT1X        0   2.693098 0.531 0.090         0
## DST         0   2.684294 0.911 0.393         0
## AHCYL1      0   2.684251 0.582 0.102         0
## AGT         0   2.670918 0.446 0.043         0
## MT1E        0   2.664751 0.552 0.097         0

SLC14A1, AQP1, SYNE1 -> Astrocytes

And overexpressed in cluster 1 vs cluster 8

cluster1vs8.markers <- FindMarkers(sn.11.5, ident.1 = 1, ident.2 = 8, min.pct = 0.25, test.use = "wilcox") 
cluster1vs8.markers <- cluster1vs8.markers[order(-cluster1vs8.markers$avg_log2FC),] 
head(cluster1vs8.markers, n = 20)
##                 p_val avg_log2FC pct.1 pct.2    p_val_adj
## SLC6A11  2.706412e-11  1.1372593 0.445 0.263 7.405826e-07
## SLC1A2   5.288953e-09  1.0810690 0.453 0.313 1.447269e-04
## GLUL     9.236793e-09  0.9860687 0.454 0.310 2.527556e-04
## APOC1    1.179102e-05  0.9298295 0.259 0.145 3.226496e-01
## NDRG2    2.894900e-12  0.8594817 0.702 0.616 7.921605e-08
## MIAT     3.441118e-03  0.7497609 0.320 0.259 1.000000e+00
## ACSS1    1.013476e-03  0.7398942 0.343 0.269 1.000000e+00
## SFXN5    2.990610e-02  0.6297845 0.337 0.306 1.000000e+00
## TTYH1    2.247265e-03  0.5741342 0.504 0.485 1.000000e+00
## CHD9     6.123444e-02  0.5734169 0.331 0.320 1.000000e+00
## LRP1B    3.017607e-02  0.5599995 0.263 0.215 1.000000e+00
## NTRK3    7.224036e-02  0.5357667 0.291 0.263 1.000000e+00
## KIAA0930 4.765375e-04  0.5270372 0.589 0.552 1.000000e+00
## MACF1    1.993998e-08  0.5215897 0.805 0.818 5.456377e-04
## MT-ND2   2.926346e-01  0.5186817 0.300 0.293 1.000000e+00
## BAZ2B    1.361101e-01  0.5130906 0.357 0.350 1.000000e+00
## NRXN1    1.035823e-01  0.4944489 0.261 0.232 1.000000e+00
## SLC1A3   7.389025e-03  0.4711984 0.481 0.461 1.000000e+00
## HIF3A    1.139567e-01  0.4705551 0.275 0.249 1.000000e+00
## FGFR3    1.630241e-02  0.4669772 0.547 0.545 1.000000e+00

GLUL (Glutamate-ammonia ligase): regulates the levels of toxic ammonia and converts neurotoxic glutamate to harmless glutamine SLC6A11: Terminates the action of GABA by its high affinity sodium-dependent reuptake into presynaptic terminals. Astrocytes type 2 (they promotes neuron survival and repair)

And overexpressed in cluster 8 vs cluster 1

cluster8vs1.markers <- FindMarkers(sn.11.5, ident.1 = 8, ident.2 = 1, min.pct = 0.25, test.use = "wilcox") 
cluster8vs1.markers <- cluster8vs1.markers[order(-cluster8vs1.markers$avg_log2FC),] 
head(cluster8vs1.markers, n = 20)
##                  p_val avg_log2FC pct.1 pct.2    p_val_adj
## ID3       3.291705e-40  1.9142349 0.374 0.066 9.007422e-36
## CPAMD8    2.275972e-44  1.9020350 0.290 0.025 6.227969e-40
## DPP10     4.358531e-52  1.7937343 0.394 0.049 1.192669e-47
## CD44      7.800680e-44  1.7148875 0.333 0.042 2.134578e-39
## MT1G      4.438674e-29  1.5739861 0.556 0.233 1.214599e-24
## COL21A1   5.325529e-34  1.5414487 0.306 0.051 1.457278e-29
## NUPR1     2.847955e-27  1.3163335 0.354 0.090 7.793144e-23
## AEBP1     8.383475e-33  1.2583071 0.609 0.233 2.294054e-28
## S100A1    2.205008e-26  1.1568562 0.549 0.216 6.033784e-22
## MT1F      1.361855e-24  1.1533683 0.434 0.144 3.726580e-20
## TJP2      7.984248e-22  1.1452740 0.279 0.069 2.184810e-17
## RPL11     1.518732e-26  1.0243270 0.502 0.172 4.155857e-22
## MT2P1     2.206938e-26  1.0079400 0.697 0.355 6.039066e-22
## NFASC     2.330822e-21  0.8956373 0.404 0.134 6.378060e-17
## TTN       9.071215e-24  0.8837213 0.667 0.309 2.482247e-19
## ITGB4     1.269255e-18  0.8779393 0.461 0.189 3.473189e-14
## CRYAB     4.352687e-16  0.8480805 0.667 0.385 1.191069e-11
## LINC01094 5.620849e-14  0.8424834 0.306 0.115 1.538089e-09
## CTSH      1.931727e-12  0.8331118 0.320 0.136 5.285977e-08
## C2orf82   5.096135e-16  0.8218179 0.273 0.084 1.394506e-11

DPP10 -> Modulates the activity and gating characteristics of the potassium channel KCND2 NDRG2 -> Contributes to the regulation of the Wnt signaling pathway. Down-regulates CTNNB1-mediated transcriptional activation of target genes, such as CCND1, and may thereby act as tumor suppressor.

'GFAP' %in% row.names(cluster8vs1.markers[cluster8vs1.markers$avg_log2FC > 0,])
## [1] TRUE

GFAP is a know marker for Astrocytes of type 1

Astrocytes type 1 (they promote death and toxiciy)

Microglia

Finally repeating the analysis for clusters 5 and 10

cluster10AND5.markers <- FindMarkers(sn.11.5, ident.1 = c(10, 5), min.pct = 0.25, test.use = "wilcox")
cluster10AND5.markers <- cluster10AND5.markers[order(-cluster10AND5.markers$avg_log2FC), ] 
head(cluster10AND5.markers, n = 20)
##                   p_val avg_log2FC pct.1 pct.2     p_val_adj
## RNA5SP151  0.000000e+00   4.124945 0.515 0.012  0.000000e+00
## LPAR6      0.000000e+00   4.104635 0.596 0.036  0.000000e+00
## CD74       0.000000e+00   4.095964 0.613 0.049  0.000000e+00
## CSF3R      0.000000e+00   4.069473 0.525 0.009  0.000000e+00
## KCNMB1     0.000000e+00   3.960295 0.472 0.013  0.000000e+00
## ITGAX      0.000000e+00   3.865904 0.463 0.007  0.000000e+00
## ADAM28     0.000000e+00   3.797975 0.469 0.009  0.000000e+00
## CSF1R      0.000000e+00   3.758224 0.433 0.007  0.000000e+00
## MED12L     0.000000e+00   3.522321 0.443 0.025  0.000000e+00
## SAT1       0.000000e+00   3.521831 0.786 0.198  0.000000e+00
## P2RY13     0.000000e+00   3.415369 0.342 0.010  0.000000e+00
## RNASET2    0.000000e+00   3.394445 0.405 0.022  0.000000e+00
## SFMBT2    2.220316e-206   3.368780 0.336 0.036 6.075672e-202
## HLA-DRA   8.698232e-297   3.314093 0.332 0.015 2.380184e-292
## MAF       1.981347e-277   3.272968 0.401 0.036 5.421759e-273
## CX3CR1     0.000000e+00   3.267177 0.322 0.005  0.000000e+00
## SRGAP2    1.142723e-236   3.192989 0.377 0.039 3.126948e-232
## CLEC7A     0.000000e+00   3.172228 0.307 0.007  0.000000e+00
## C3        3.053651e-295   3.171522 0.356 0.020 8.356011e-291
## C1QB      5.990104e-306   3.132509 0.315 0.011 1.639132e-301

LPAR6, ITGAX, ADAM28-> Microglia

Genes overexpressed in cluster 10 vs cluster 5

cluster10vs5.markers <- FindMarkers(sn.11.5, ident.1 = 10, ident.2 = 5, min.pct = 0.25, test.use = "wilcox") 
cluster10vs5.markers <- cluster10vs5.markers[order(-cluster10vs5.markers$avg_log2FC),] 
head(cluster10vs5.markers, n = 20)
##                p_val avg_log2FC pct.1 pct.2    p_val_adj
## DST     1.115462e-56   3.189506  0.96 0.104 3.052350e-52
## FGFR3   1.670328e-86   3.081269  0.68 0.008 4.570687e-82
## SLC14A1 2.867634e-78   2.995867  0.64 0.009 7.846993e-74
## IQCA1   4.820062e-70   2.978803  0.60 0.011 1.318962e-65
## SYNE1   5.320494e-61   2.973614  0.66 0.026 1.455900e-56
## MT2A    2.121843e-45   2.905028  0.90 0.121 5.806210e-41
## MT1E    1.942050e-60   2.802417  0.60 0.019 5.314225e-56
## ACOT11  2.071802e-56   2.688174  0.56 0.017 5.669279e-52
## C1orf61 2.849189e-54   2.686989  0.84 0.068 7.796519e-50
## PLXNB1  1.852518e-49   2.609471  0.56 0.025 5.069230e-45
## RYR3    2.800696e-59   2.558244  0.54 0.012 7.663824e-55
## MACF1   7.227341e-37   2.486081  0.84 0.123 1.977690e-32
## AQP4    1.175331e-41   2.456832  0.52 0.028 3.216175e-37
## NDRG2   1.156062e-34   2.439602  0.78 0.106 3.163449e-30
## NTM     1.936312e-58   2.419858  0.52 0.011 5.298525e-54
## COL16A1 1.507053e-47   2.395279  0.48 0.016 4.123899e-43
## LGI4    5.841375e-44   2.392608  0.40 0.009 1.598434e-39
## SDC4    4.612178e-46   2.363473  0.40 0.008 1.262076e-41
## GJA1    1.346061e-50   2.342807  0.60 0.028 3.683362e-46
## NTRK2   3.297522e-57   2.340333  0.58 0.019 9.023339e-53

FGFR3 -> Apoptosis IQCA1 -> Motility

And overexpressed in cluster 5 vs cluster 10

cluster5vs10.markers <- FindMarkers(sn.11.5, ident.1 = 5, ident.2 = 10, min.pct = 0.25, test.use = "wilcox") 
cluster5vs10.markers <- cluster5vs10.markers[order(-cluster5vs10.markers$avg_log2FC),] 
head(cluster5vs10.markers, n = 20)
##                 p_val avg_log2FC pct.1 pct.2    p_val_adj
## CD74     3.333088e-05   1.669590 0.616  0.58 9.120661e-01
## MEF2A    3.557113e-02   1.629902 0.314  0.24 1.000000e+00
## RPS18    8.398421e-03   1.600621 0.339  0.22 1.000000e+00
## SLCO2B1  1.060275e-01   1.492347 0.299  0.26 1.000000e+00
## RPS14    3.835965e-02   1.491329 0.286  0.20 1.000000e+00
## FMNL3    2.172339e-02   1.477008 0.342  0.26 1.000000e+00
## RPL10    2.573786e-02   1.428086 0.412  0.40 1.000000e+00
## C1QB     5.454442e-02   1.426445 0.319  0.26 1.000000e+00
## MAF      4.614209e-02   1.414126 0.401  0.40 1.000000e+00
## TMSB4X   1.280326e-11   1.379628 0.899  0.92 3.503484e-07
## B2M      1.786634e-03   1.377178 0.526  0.48 1.000000e+00
## RPLP2    1.573563e-01   1.351168 0.328  0.32 1.000000e+00
## RPL15    1.165688e-01   1.348350 0.283  0.24 1.000000e+00
## RPL37A   1.943178e-01   1.346073 0.285  0.26 1.000000e+00
## RPL3     9.163406e-02   1.312962 0.386  0.40 1.000000e+00
## MEF2C    1.039114e-01   1.303985 0.386  0.42 1.000000e+00
## RPLP1    1.384900e-01   1.298989 0.365  0.38 1.000000e+00
## ADAP2    9.214586e-02   1.273756 0.266  0.20 1.000000e+00
## SFMBT2   4.265497e-02   1.265804 0.342  0.26 1.000000e+00
## SERPINB9 5.897223e-02   1.259930 0.306  0.22 1.000000e+00

CD74 -> Immune response (MHC complex) MEF2A -> phosphorylated and sumoylated MEF2A represses transcription of NUR77 promoting synaptic differentiation

Microglia supporting neurogenesis

Interestingly MEF2A and CD74 (and many more) are highly correlated with cluster 5! So it may be possibile that cluster 10 is a ‘bad’ cluster. (not meaningful)

Visualizing marker genes

We can plot the expression of the markers with a heatmap

for (feature in c("S100B", "SLC6A11", "GAPDH", "OPALIN", "PTPRZ1", "MEF2A", "CERCAM", "RGS5", 'GFAP', 'SYT1', 'CD74')){
  p <- FeaturePlot(sn.11.5, features = feature, repel = T) 
  plot(p)
}

Or with a violin plot

VlnPlot(sn.11.5, features = "S100B")

VlnPlot(sn.11.5, features = "SLC6A11")

VlnPlot(sn.11.5, features = "GAPDH")

VlnPlot(sn.11.5, features = "OPALIN")

VlnPlot(sn.11.5, features = "PTPRZ1")

VlnPlot(sn.11.5, features = "MEF2A")

VlnPlot(sn.11.5, features = "CERCAM")

VlnPlot(sn.11.5, features = "RGS5")

VlnPlot(sn.11.5, features = "GFAP")

VlnPlot(sn.11.5, features = "SYT1")

VlnPlot(sn.11.5, features = "CD74")

We can visualize all the marker genes and their expression (CPM) in each cluster using a dot plot.

library(ggrepel)
DotPlot(sn.11.5, features = c("S100B", "SLC6A11", "GAPDH", "OPALIN", "PTPRZ1", "MEF2A", "CERCAM", "RGS5", 'GFAP', 'SYT1', 'CD74')) + theme(axis.text.x = element_text(angle = 90))

Final Result

Using marker genes to infere the subtypes of each cluster was quite difficult and needed a deep knowlegde of the fild (which in our case is limited). Eventually we were able to identify 10 different cells types of which 4 subtypes of ODC, 2 subtypes of AST and Microglia, Neuros, OPC ad Endothelial cells.

Comparing the results with the automated pipeline employed by Panglao, we were able to label and cluster the cells with unknown cell type. Also we did not found Pancreatich stellate cells and Pericytes, may be due to different clustering parameters.

new.cluster.ids <- c("OPC AST-precursors", "AST-2", "Satellite ODC", "Perivascular ODC", "OPC", "Microglia", "Intrafascicular ODC", "Endothelial", 'AST-1', 'Neurons', 'Microglia') 
names(new.cluster.ids) <- levels(sn.11.5) 
sn.11.5 <- RenameIdents(sn.11.5, new.cluster.ids) 
DimPlot(sn.11.5, reduction = "umap", label = TRUE, pt.size = 0.5) + NoLegend()